Basic Descriptives

Ns

nrow(articles)
## [1] 13136
nrow(sources)
## [1] 77454
nrow(authors)
## [1] 1842

Articles

Number Over Time By outlet

articles %>%
    group_by(year, source) %>%
    summarize(n = n()) %>%
    ggplot(aes(x = year, y = n, color = source)) +
    geom_line() +
    theme_bw() +
    scale_x_continuous(breaks = c(2012, 2014, 2016, 2018, 2020, 2022)) +
    labs(
        x = "Year",
        y = "Number of Articles",
        color = "Newspaper"
    ) +
    theme_min

ggsave(here("paper/figures/time_trend_n_articles.png"), width = 6, height = 4)

Sources

Number per Article

articles$n_srcs %>% hist()

summary(articles$n_srcs)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   6.000   5.896   8.000  31.000

Number per Article over Time

sources %>%
    group_by(year) %>%
    summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles) %>%
    ggplot(aes(x = year, y = avg_sources)) +
    geom_line() +
    scale_x_continuous(breaks = seq(2012, 2022, 2)) +
    theme_min

etable(feols(n_srcs ~ post2016, data=articles)) %>% htmlTable
feols(n_srcs ~ p..
1 Dependent Var.: n_srcs
2
3 Constant 5.272*** (0.0492)
4 post2016TRUE 0.8374*** (0.0570)
5 _______________ __________________
6 S.E. type IID
7 Observations 13,136
8 R2 0.01617
9 Adj. R2 0.01610

Number per Outlet

sources %>%
    group_by(source) %>%
    summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles) %>% 
    kable()
source n n_articles avg_sources
Chicago Tribune 3052 578 5.280277
Los Angeles Times 10173 1796 5.664254
New York Times 28282 4671 6.054806
USA Today 3397 684 4.966374
Wall Street Journal 12081 2286 5.284777
Washington Post 20469 3121 6.558475
sources %>%
    group_by(source, year) %>%
    summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles) %>%
    ggplot(aes(x = year, y = avg_sources, color = source)) +
    geom_line() +
    theme_min

# The example articles
# filter(str_detect(filename, "1270322870.xml|1731405273.xml")) %>%

Source Types (Table 1)

gen.cats <- table(sources$category) %>%
    prop.table() %>%
    as.data.frame() %>%
    rename(Category = "Var1") %>%
    mutate(
        Freq = paste0(round(100 * Freq, digits = 1), "%")
    )

sources$category2 <- case_when(
    sources$env_category == "environmental" ~ paste(sources$category, "- Environmental"),
    sources$env_category == "fossil fuel" ~ paste(sources$category, "- Fossil Fuel"),
    sources$category == "Politician" & sources$pol_party == "Democrat" ~ "Politician - Democrat",
    sources$category == "Politician" & sources$pol_party == "Republican" ~ "Politician - Republican",
    sources$category == "Politician" & sources$category == "Politician" ~ "Politician - International",
    sources$category == "Citizen" ~ "Other",
    sources$category == "Advocacy" ~ "Advocacy - Other",
    sources$category == "Business" ~ "Business - Other",
    TRUE ~ sources$category
)
sub.cats <- table(sources$category2) %>%
    prop.table() %>%
    as.data.frame() %>%
    rename(Category = "Var1") %>%
    mutate(
        Freq = paste0(round(100 * Freq, digits = 1), "%")
    )

rbind(gen.cats, sub.cats) %>% distinct() %>% kable()
Category Freq
Academic 10.2%
Advocacy 21.5%
Bureaucrat 12.4%
Business 15%
International 11.3%
Media 5.6%
Other 4.1%
Politician 19.8%
Advocacy - Environmental 13.7%
Advocacy - Fossil Fuel 0.4%
Advocacy - Other 7.5%
Business - Environmental 2%
Business - Fossil Fuel 4.3%
Business - Other 8.6%
Politician - Democrat 12%
Politician - International 2.8%
Politician - Republican 5%

Distribution of source types by newspaper

sources %>%
    mutate(
        category = factor(category, levels = c(
            "Academic",
            "Advocacy",
            "Business",
            "Bureaucrat",
            "Politician",
            "International",
            "Media",
            "Other"
        ))
    ) %>%
    group_by(category, source) %>%
    summarize(n = n()) %>%
    group_by(source) %>%
    mutate(prop = n / sum(n)) %>%
    ungroup() %>%
    ggplot(
        aes(x = reorder(source, n), fill = fct_rev(category), y = prop)
    ) +
    geom_bar(stat = "identity") +
    theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "bottom") +
    labs(x = "", y = "Prop", fill = "Category") +
    scale_fill_brewer(palette = "Set2", direction = -1) +
    coord_flip() +
    guides(fill = guide_legend(reverse = TRUE))

ggsave(here("paper/figures/source-dist.png"), width = 8, height = 6)

sources.s2 <- sources %>%
    mutate(category.slant2 = case_when(
        category.slant %in% c("Bureaucrat", "International", "Politician") ~ "Other",
        category.slant %in% c("Academic", "Media", "Advocacy") ~ "Other",
        TRUE ~ category.slant
    )) %>%
    mutate(
        category.slant2 = factor(category.slant2, levels = c(
            "Environmental",
            "Business",
            "Fossil Fuel",
            "Democrat",
            "Republican",
            "Other"
        ))
    )
sources.s2 %>%
    group_by(category.slant2, source) %>%
    summarize(n = n()) %>%
    group_by(source) %>%
    mutate(prop = n / sum(n)) %>%
    ungroup() %>%
    ggplot(
        aes(x = reorder(source, n), fill = fct_rev(category.slant2), y = prop)
    ) +
    geom_bar(stat = "identity") +
    scale_fill_manual(values = c(
        "Environmental" = "#91bfdb",
        "Democrat" = "#4575b4",
        "Republican" = "#d73027",
        "Fossil Fuel" = "#fc8d59",
        "Business" = "#fee090",
        "Other" = "#ffffbf"
    )) +
    theme_bw() +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "bottom") +
    labs(x = "", y = "Prop", fill = "Category") +
    coord_flip() +
    guides(fill = guide_legend(reverse = TRUE))

ggsave(here("paper/figures/source-dist-2.png"), width = 8, height = 6)

sources.s2$category.slant2 %>%
    table() %>%
    prop.table() %>% 
    kable
. Freq
Environmental 0.1536783
Business 0.0844501
Fossil Fuel 0.0465050
Democrat 0.1446923
Republican 0.0551811
Other 0.5154931

DIME correlates to category

feols(cfscore ~ category.slant, data = sources) %>% 
    etable %>% 
    htmlTable
.
1 Dependent Var.: cfscore
2
3 Constant 0.8953*** (0.0096)
4 category.slantAdvocacy -0.9919*** (0.0152)
5 category.slantBusiness -0.7901*** (0.0144)
6 category.slantDemocrat -1.468*** (0.0117)
7 category.slantEnvironmental -1.716*** (0.0130)
8 category.slantFossilFuel -0.5482*** (0.0152)
9 category.slantInternational -1.378*** (0.0856)
10 category.slantOther -0.9430*** (0.0239)
11 ___________________________ ___________________
12 S.E. type IID
13 Observations 23,876
14 R2 0.50244
15 Adj. R2 0.50229

Top Source DIME scores

dt <- sources %>%
    filter(cfscore_src != "politician") %>%
    filter(!is.na(org_id)) %>%
    group_by(org_id, cfscore.i2) %>%
    summarize(
        n = n(), cfscore = fmode(cfscore), name = first(organization_name), category.slant = first(category.slant)
    ) %>%
    arrange(desc(n)) %>%
    filter(!is.na(cfscore)) %>%
    head(50) %>%
    arrange(n) %>%
    mutate(rn = 51 - row_number(), n_size = n + 10)
dt %>% ggplot(aes(x = cfscore, size = sqrt(n), y = n, color = cfscore)) +
    theme_bw() +
    scale_y_log10() +
    scale_x_continuous(limits = c(-1.5, 1.2)) +
    scale_color_gradient(low = "blue", high = "red") +
    geom_vline(xintercept = 0, lty = "dashed") +
    theme(panel.grid = element_blank(), axis.text.y = element_blank()) +
    labs(x = "DIME Ideology", y = "") +
    geom_text_repel(
        data = subset(dt, category.slant != "other" & !(name %in% c("The Associated Press", "Associated Press", "California"))),
        aes(x = cfscore, label = name),
        direction = "y",
        min.segment.length = 999
    ) +
    guides(size = "none", color = "none", fill = "none")

ggsave(here("paper/figures/dime-scatter-100.png"), width = 10, height = 6)
dt %>% ggplot(aes(x = cfscore.i2, size = sqrt(n), y = n, color = cfscore.i2)) +
    theme_bw() +
    scale_y_log10() +
    scale_x_continuous(limits = c(-1.5, 1.2)) +
    scale_color_gradient(low = "blue", high = "red") +
    geom_vline(xintercept = 0, lty = "dashed") +
    theme(panel.grid = element_blank(), axis.text.y = element_blank()) +
    labs(x = "DIME Ideology", y = "") +
    geom_text_repel(
        data = subset(dt, category.slant != "other" & !(name %in% c("The Associated Press", "Associated Press", "California"))),
        aes(x = cfscore.i2, label = name),
        direction = "y",
        min.segment.length = 999
    ) +
    guides(size = "none", color = "none", fill = "none")

ggsave(here("paper/figures/dime-scatter-100-i2.png"), width = 10, height = 6)

Top DIME over time

dt7 <- dt %>% arrange(desc(n)) %>% head(7)
sources %>%
    filter(org_id %in% dt7$org_id) %>%
    group_by(year, organization_name, org_id, cfscore) %>%
    summarize(n = n()) %>%
    ggplot(aes(
        x = year, y = n, color = cfscore < 0, lty = organization_name
    )) +
    geom_smooth(se = F) +
    theme_min

Politicians

pres <- sources %>%
    mutate(
        pres = case_when(
            str_detect(person_name, "Biden") ~ "Biden",
            str_detect(person_name, "Obama") ~ "Obama",
            str_detect(person_name, "Trump") ~ "Trump",
            category.slant %in% c("Democrat", "Republican") ~ category.slant,
            TRUE ~ NA
        )
    )
npy <- sources %>%
    group_by(year) %>%
    summarize(npy = n())

ggarrange(plotlist = list(
    pres %>%
        group_by(year, pres) %>%
        filter(!is.na(pres)) %>%
        summarize(n = n()) %>%
        ggplot(aes(x = year, y = n, color = pres)) +
        geom_line() +
        theme_min,
    # Normalize by # of sources per year...
    pres %>%
        group_by(year, pres) %>%
        filter(!is.na(pres)) %>%
        summarize(n = n()) %>%
        left_join(npy) %>%
        ggplot(aes(x = year, y = n / npy, color = pres, shape = pres)) +
        geom_line() +
        geom_point() +
        scale_x_continuous(breaks = seq(2012, 2022, 4)) +
        theme_min
), common.legend = T, legend = "bottom")

Journalists

Demographics

ggarrange(plotlist = list(
    authors %>% ggplot(aes(x = age_est_2017)) +
        geom_histogram(),
    authors %>% ggplot(aes(x = elite_undergrad_ivyplus)) +
        geom_bar(stat = "count"),
    authors %>% ggplot(aes(x = edu.has_postgrad)) +
        geom_bar(stat = "count"),
    authors %>% ggplot(aes(x = field.journo)) +
        geom_bar(stat = "count"),
    authors %>% ggplot(aes(x = gender)) +
        geom_bar(stat = "count"),
    authors %>% ggplot(aes(x = race.nonwhite)) +
        geom_bar(stat = "count")
))

Demographics over time

Unit of analysis: unique journalists per year

journo.years <- articles %>%
    group_by(author_name, year, elite_undergrad_ivyplus, edu.undergrad, edu.has_postgrad, is_career, field.journo, age_est, gender, race.nonwhite) %>%
    summarize(n = n()) %>%
    filter(!is.na(author_name))
age.plt <- journo.years %>%
    ggplot(aes(x = year, y = as.numeric(age_est > 35))) +
    geom_smooth(method = "loess") +
    labs(x = "Year", y = "Age > 35")
ivy.plt <- journo.years %>%
    ggplot(aes(x = year, y = as.numeric(elite_undergrad_ivyplus))) +
    geom_smooth(method = "loess") +
    labs(y = "Ivy League", x = "Year")
fj.plt <- journo.years %>%
    ggplot(aes(x = year, y = as.numeric(field.journo))) +
    labs(y = "Journalism Degree", x = "Year") +
    geom_smooth(method = "loess")

pg.plt <- journo.years %>%
    ggplot(aes(x = year, y = as.numeric(edu.has_postgrad))) +
    labs(y = "Postgraduate Degree", x = "Year") +
    geom_smooth(method = "loess")
gender.plt <- journo.years %>%
    ggplot(aes(x = year, y = as.numeric(gender == "female"))) +
    labs(y = "Female", x = "Year") +
    geom_smooth(method = "loess")
race.plt <- journo.years %>%
    ggplot(aes(x = year, y = as.numeric(race.nonwhite))) +
    labs(y = "Nonwhite", x = "Year") +
    geom_smooth(method = "loess")
plts <- list(age.plt, ivy.plt, fj.plt, gender.plt, pg.plt, race.plt) %>%
    lapply(function(p) {
        p +
            theme_bw() +
            theme(panel.grid = element_blank()) +
            scale_x_continuous(breaks = seq(2012, 2022, 2))
    })
ggarrange(plotlist = plts)

ggsave(here("paper/figures/journo-agg-2.png"), width = 10, height = 6)

Ideological Sources

sources %>%
    group_by(year, category.slant) %>%
    summarize(n = n()) %>%
    ggplot(aes(x = year, y = n, color = category.slant, lty = category.slant)) +
    geom_smooth(se = F) +
    theme_min

sources_per_year <- sources %>%
    group_by(year) %>%
    summarize(year.n = n())
sources %>%
    group_by(year, category.slant) %>%
    summarize(n = n()) %>%
    left_join(sources_per_year) %>%
    ggplot(aes(x = year, y = n / year.n, color = category.slant, lty = category.slant)) +
    geom_line() +
    theme_min

Coverage/data quality

Articles with DIME and journalist information

articles.b <- sources %>% calculate_balance(types = c("org_category", "cfscore"))
articles.with.author <- articles.b %>% filter(!is.na(author))
articles.with.author.cfscore <- articles.with.author %>% filter(!is.na(cfscore))
nrow(articles.with.author)
## [1] 12348
nrow(articles.with.author.cfscore)
## [1] 9915

DIME coverage

nrow(sources %>% filter(!is.na(cfscore))) / nrow(sources)
## [1] 0.3082604
nrow(sources %>% filter(!is.na(cfscore.impute))) / nrow(sources)
## [1] 0.5452656
table(
    !is.na(sources$cfscore),
    sources$category.slant
)
##        
##         Academic Advocacy Bureaucrat Business Democrat Environmental
##   FALSE     7506     2981       9635     3647     3658          7638
##   TRUE         0     2384          0     2894     7549          4265
##        
##         Fossil Fuel International Media Other Politician Republican
##   FALSE        1180          6829  4318  3955       1578        653
##   TRUE         2422            46     0   695          0       3621
n_eligible <- sources %>%
    filter(tolower(category.slant) %in% c("advocacy", "environmental", "fossil fuel", "business", "democrat", "republican")) %>%
    nrow()
# Excludes bureaucrat, media, other
nrow(sources %>% filter(!is.na(cfscore))) / n_eligible
## [1] 0.5566539
nrow(sources %>% filter(!is.na(cfscore.impute))) / n_eligible
## [1] 0.9846358

Citation Topics are Correlated to Source Types, Politician Parties, Ideology

list(
    feols(src_topic == "Business" ~ category.slant, data = sources),
    feols(src_topic == "Environment" ~ category.slant, data = sources),
    feols(src_topic == "Policy" ~ category.slant, data = sources)
) %>%
    etable() %>%
    htmlTable()
model 1 model 2 model 3
1 Dependent Var.: src_topic==“Business” src_topic==“Environment” src_topic==“Policy”
2
3 Constant 0.1106*** (0.0038) 0.3392*** (0.0044) 0.4301*** (0.0055)
4 category.slantAdvocacy 0.0410*** (0.0059) -0.1800*** (0.0068) 0.1183*** (0.0085)
5 category.slantBureaucrat 0.0011 (0.0051) -0.0952*** (0.0059) 0.1068*** (0.0073)
6 category.slantBusiness 0.2725*** (0.0056) -0.2082*** (0.0065) -0.0638*** (0.0080)
7 category.slantDemocrat -0.0583*** (0.0049) -0.2378*** (0.0057) 0.3641*** (0.0071)
8 category.slantEnvironmental 0.0445*** (0.0049) -0.0652*** (0.0056) 0.0975*** (0.0070)
9 category.slantFossilFuel 0.3539*** (0.0067) -0.2701*** (0.0077) 0.0025 (0.0096)
10 category.slantInternational -0.0397*** (0.0055) -0.1318*** (0.0064) 0.2108*** (0.0079)
11 category.slantMedia -0.0325*** (0.0063) -0.1852*** (0.0073) -0.0308*** (0.0090)
12 category.slantOther -0.0179** (0.0061) -0.0994*** (0.0071) -0.0017 (0.0088)
13 category.slantPolitician -0.0656*** (0.0091) -0.2961*** (0.0106) 0.4141*** (0.0131)
14 category.slantRepublican -0.0535*** (0.0063) -0.3067*** (0.0073) 0.4031*** (0.0091)
15 ___________________________ _____________________ ________________________ ___________________
16 S.E. type IID IID IID
17 Observations 77,454 77,454 77,454
18 R2 0.10390 0.05104 0.09159
19 Adj. R2 0.10378 0.05090 0.09146
list(
    feols(src_topic == "Business" ~ env_category, data = sources),
    feols(src_topic == "Environment" ~ env_category, data = sources),
    feols(src_topic == "Policy" ~ env_category, data = sources)
) %>%
    etable() %>%
    htmlTable()
model 1 model 2 model 3
1 Dependent Var.: src_topic==“Business” src_topic==“Environment” src_topic==“Policy”
2
3 Constant 0.1545*** (0.0036) 0.2713*** (0.0037) 0.5313*** (0.0045)
4 env_categoryfossilfuel 0.3066*** (0.0075) -0.2028*** (0.0077) -0.0953*** (0.0094)
5 ______________________ _____________________ ________________________ ___________________
6 S.E. type IID IID IID
7 Observations 15,818 15,818 15,818
8 R2 0.09579 0.04208 0.00647
9 Adj. R2 0.09573 0.04202 0.00641
list(
    feols(src_topic == "Business" ~ pol_party, data = sources),
    feols(src_topic == "Environment" ~ pol_party, data = sources),
    feols(src_topic == "Policy" ~ pol_party, data = sources)
) %>%
    etable() %>%
    htmlTable()
model 1 model 2 model 3
1 Dependent Var.: src_topic==“Business” src_topic==“Environment” src_topic==“Policy”
2
3 Constant 0.0519*** (0.0021) 0.0982*** (0.0025) 0.7978*** (0.0037)
4 pol_partyOther 0.0271*** (0.0058) 0.0115. (0.0069) -0.0781*** (0.0100)
5 pol_partyRepublican 0.0035 (0.0040) -0.0652*** (0.0048) 0.0377*** (0.0070)
6 ___________________ _____________________ ________________________ ___________________
7 S.E. type IID IID IID
8 Observations 18,122 18,122 18,122
9 R2 0.00122 0.01106 0.00601
10 Adj. R2 0.00111 0.01095 0.00590
list(
    feols(src_topic == "Business" ~ cfscore, data = sources),
    feols(src_topic == "Environment" ~ cfscore, data = sources),
    feols(src_topic == "Policy" ~ cfscore, data = sources)
) %>%
    etable() %>%
    htmlTable()
model 1 model 2 model 3
1 Dependent Var.: src_topic==“Business” src_topic==“Environment” src_topic==“Policy”
2
3 Constant 0.1655*** (0.0024) 0.1175*** (0.0022) 0.6534*** (0.0031)
4 cfscore 0.0485*** (0.0029) -0.0491*** (0.0026) -0.0078* (0.0038)
5 _______________ _____________________ ________________________ ___________________
6 S.E. type IID IID IID
7 Observations 23,876 23,876 23,876
8 R2 0.01182 0.01472 0.00018
9 Adj. R2 0.01178 0.01467 0.00014
etable(feols(cfscore ~ src_topic, data = sources))
##                      feols(cfscore ~ s..
## Dependent Var.:                  cfscore
##                                         
## Constant              0.0495*** (0.0132)
## src_topicEnvironment -0.4679*** (0.0198)
## src_topicOther       -0.1150*** (0.0247)
## src_topicPolicy      -0.2135*** (0.0147)
## ____________________ ___________________
## S.E. type                            IID
## Observations                      23,876
## R2                               0.02366
## Adj. R2                          0.02353
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Non policy articles cite fewer ideological sources

all.sources$cfscore_std <- all.sources$cfscore - mean(all.sources$cfscore, na.rm = T)
all.sources.balance <- all.sources %>% calculate_balance(types = c("cfscore", "org_category"))
models <-
    list(
        feols(n_left ~ policy_label_gpt, data = all.sources.balance),
        feols(n_right ~ policy_label_gpt, data = all.sources.balance)
    )
etable(models) %>% htmlTable
model 1 model 2
1 Dependent Var.: n_left n_right
2
3 Constant 1.208*** (0.0117) 0.5336*** (0.0084)
4 _______________ _________________ __________________
5 S.E. type IID IID
6 Observations 13,136 13,136

NYT comment counts

nyt.articles <- articles.b %>% filter(source == "New York Times")
nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = cfscore)) +
    geom_point(alpha = 0.3, size = 10) +
    scale_color_gradient(low = "blue", high = "red")

nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = balance)) +
    geom_point()

hist(nyt.articles$n_comments)

sd(nyt.articles$n_comments, na.rm = T)
## [1] 512.5869
table(nyt.articles$n_comments %>% is.na())
## 
## FALSE  TRUE 
##  1363  3308
nyt.articles %>% ggplot(aes(x = cfscore, y = n_comments)) +
    geom_point() +
    geom_smooth() +
    theme_bw()

ggsave(here("paper/figures/nyt-comments-slant-scatter.png"), width = 6, height = 5)
summary(feols(n_comments ~ cfscore, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,193
## Standard-errors: IID 
##             Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 371.5255    16.7083 22.23596 < 2.2e-16 ***
## cfscore      55.2263    30.4885  1.81138  0.070334 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 502.1   Adj. R2: 0.00191
m <- feols(n_comments ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad, data = nyt.articles)
etable(m) %>% htmlTable
m
1 Dependent Var.: n_comments
2
3 Constant 426.6*** (74.89)
4 elite_undergrad_ivyplusTRUE 56.27 (36.95)
5 age_est_2017 -0.6828 (1.708)
6 race.nonwhiteTRUE -42.77 (53.97)
7 gendermale -56.49. (33.13)
8 field.journoTRUE 204.5*** (47.33)
9 edu.has_postgradTRUE -95.68** (31.52)
10 ___________________________ ________________
11 S.E. type IID
12 Observations 1,225
13 R2 0.03291
14 Adj. R2 0.02815

What articles get more comments?

# Journalist characteristics = journalist background is sort of significant I guess?
summary(feols(log(n_comments) ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad, data = nyt.articles))
## OLS estimation, Dep. Var.: log(n_comments)
## Observations: 1,208
## Standard-errors: IID 
##                              Estimate Std. Error   t value   Pr(>|t|)    
## (Intercept)                  5.424881   0.196382 27.624160  < 2.2e-16 ***
## elite_undergrad_ivyplusTRUE  0.235508   0.096537  2.439571 1.4849e-02 *  
## age_est_2017                -0.003774   0.004536 -0.832097 4.0552e-01    
## race.nonwhiteTRUE           -0.096726   0.140627 -0.687818 4.9170e-01    
## gendermale                  -0.104422   0.086222 -1.211083 2.2610e-01    
## field.journoTRUE             0.475912   0.122880  3.872993 1.1330e-04 ***
## edu.has_postgradTRUE        -0.456227   0.082224 -5.548563 3.5408e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.35136   Adj. R2: 0.049822
# More comments for more rightward leaning articles.. that's basically it
# Not sure exactly how to interpret this...
summary(feols(n_comments ~ balance, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,087
## Standard-errors: IID 
##             Estimate Std. Error   t value  Pr(>|t|)    
## (Intercept) 361.2239    19.9166 18.136804 < 2.2e-16 ***
## balanceTRUE  19.6879    32.5088  0.605619   0.54489    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 518.5   Adj. R2: -5.834e-4
summary(feols(n_comments ~ cfscore + balance, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,085
## Standard-errors: IID 
##              Estimate Std. Error   t value  Pr(>|t|)    
## (Intercept) 386.84998    23.3908 16.538559 < 2.2e-16 ***
## cfscore      66.41133    32.5687  2.039113  0.041681 *  
## balanceTRUE   3.54252    33.3777  0.106134  0.915495    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 517.9   Adj. R2: 0.002302
summary(feols(log(n_comments) ~ cfscore + balance, data = nyt.articles))
## OLS estimation, Dep. Var.: log(n_comments)
## Observations: 1,069
## Standard-errors: IID 
##             Estimate Std. Error   t value  Pr(>|t|)    
## (Intercept) 5.214308   0.062564 83.342947 < 2.2e-16 ***
## cfscore     0.253084   0.087095  2.905837 0.0037384 ** 
## balanceTRUE 0.020648   0.088885  0.232299 0.8163503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.36934   Adj. R2: 0.006801
# Interesting...
summary(feols(n_comments ~ cfscore + balance + n, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,085
## Standard-errors: IID 
##             Estimate Std. Error   t value   Pr(>|t|)    
## (Intercept) 209.6002   50.82554  4.123915 4.0103e-05 ***
## cfscore      65.4563   32.35542  2.023039 4.3315e-02 *  
## balanceTRUE -21.7149   33.77798 -0.642871 5.2044e-01    
## n            24.9750    6.36918  3.921225 9.3648e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 514.2   Adj. R2: 0.015384
summary(feols(log(n_comments) ~ cfscore + balance + n, data = nyt.articles))
## OLS estimation, Dep. Var.: log(n_comments)
## Observations: 1,069
## Standard-errors: IID 
##              Estimate Std. Error   t value   Pr(>|t|)    
## (Intercept)  4.541096   0.134595 33.738980  < 2.2e-16 ***
## cfscore      0.251914   0.085869  2.933712 3.4211e-03 ** 
## balanceTRUE -0.074135   0.089237 -0.830760 4.0630e-01    
## n            0.094664   0.016822  5.627525 2.3364e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.34942   Adj. R2: 0.034576
summary(feols(n_comments ~ cfscore + balance + post2016, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,085
## Standard-errors: IID 
##               Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)  333.33012    45.6382 7.303745 5.4209e-13 ***
## cfscore       64.74770    32.5785 1.987435 4.7126e-02 *  
## balanceTRUE    6.37288    33.4287 0.190641 8.4884e-01    
## post2016TRUE  60.90065    44.5990 1.365517 1.7237e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 517.4   Adj. R2: 0.003098
# no super intersting interaction with the post
summary(feols(n_comments ~ (cfscore > 0) * post2016, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,193
## Standard-errors: IID 
##                              Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)                  313.8239    42.1182 7.451035 1.7762e-13 ***
## cfscore > 0TRUE               65.1438    99.4974 0.654729 5.1277e-01    
## post2016TRUE                  26.7044    45.6526 0.584948 5.5869e-01    
## cfscore > 0TRUE:post2016TRUE  39.8135   106.8691 0.372544 7.0955e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 501.1   Adj. R2: 0.004551
summary(feols(log(n_comments) ~ (cfscore > 0) * post2016, data = nyt.articles))
## OLS estimation, Dep. Var.: log(n_comments)
## Observations: 1,173
## Standard-errors: IID 
##                               Estimate Std. Error   t value  Pr(>|t|)    
## (Intercept)                   5.369099   0.120050 44.723830 < 2.2e-16 ***
## cfscore > 0TRUE               0.237157   0.290408  0.816632 0.4143050    
## post2016TRUE                 -0.367322   0.129405 -2.838541 0.0046104 ** 
## cfscore > 0TRUE:post2016TRUE  0.102424   0.309573  0.330856 0.7408128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.37169   Adj. R2: 0.01348
nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = balance)) +
    geom_smooth(se = F, method = "lm") +
    geom_point()

nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = cfscore > 0)) +
    geom_smooth(se = F, method = "lm") +
    geom_point()

etable(
    list(
        feols(n_comments ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad, data = nyt.articles),
        feols(n_comments ~ cfscore + balance + n, data = nyt.articles)
    )
)
##                                      model 1          model 2
## Dependent Var.:                   n_comments       n_comments
##                                                              
## Constant                    426.6*** (74.89) 209.6*** (50.83)
## elite_undergrad_ivyplusTRUE    56.27 (36.95)                 
## age_est_2017                 -0.6828 (1.708)                 
## race.nonwhiteTRUE             -42.77 (53.97)                 
## gendermale                   -56.49. (33.13)                 
## field.journoTRUE            204.5*** (47.33)                 
## edu.has_postgradTRUE        -95.68** (31.52)                 
## cfscore                                        65.46* (32.36)
## balanceTRUE                                    -21.71 (33.78)
## n                                            24.97*** (6.369)
## ___________________________ ________________ ________________
## S.E. type                                IID              IID
## Observations                           1,225            1,085
## R2                                   0.03291          0.01811
## Adj. R2                              0.02815          0.01538
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

END